Code
library(tidyverse)
library(arrow)
library(splines)
library(corrr)
library(scales)
library(FactoMineR)
library(factoextra)
library(glue)
theme_set(theme_bw() + theme(legend.position = "top"))Exploratory Data Analysis 1
library(tidyverse)
library(arrow)
library(splines)
library(corrr)
library(scales)
library(FactoMineR)
library(factoextra)
library(glue)
theme_set(theme_bw() + theme(legend.position = "top"))country que tiene el país, esto lo hago con la finalidad de evaluar patrones de comportamiento por diferentes países y porque podría ser de ayuda para extrapolar a regiones donde no tenemos registros históricos de floración. Para diferenciar los datos de NPN para USA, los etiqueto en la variable country como USA-NPN y a los de Washington DC como USA-WDC.USA-WDC porque sólo es una coordenada (lat=38.88535, long=-77.03863) y tampoco se muestran los resultados de USA-NPN, la razón es que las variables bioclimáticas no están para las coordenadas de USA-NPN, sin embargo, como información de clima fue suministrada por NPN, uso esta información para análisis exploratorio al final de este documento y también en el próximo (06-EDA2.qmd).df_full <- read_parquet("../external-data/df_full.parquet") |>
mutate(
country =
case_when(
str_detect(location, "Japan") ~ "Japan",
str_detect(location, "kyoto") ~ "Japan",
str_detect(location, "liestal") ~ "Switzerland",
str_detect(location, "Switzerland") ~ "Switzerland",
str_detect(location, "South Korea") ~ "South Korea",
str_detect(location, "vancouver") ~ "Canada",
str_detect(location, "washingtondc") ~ "USA-WDC",
str_detect(location, "NPN") ~ "USA-NPN"
)
) |>
relocate(location, country, everything())df_full |>
filter(location != "NPN") |>
ggplot(aes(x = bloom_doy)) +
facet_wrap(~country, ncol = 1, scales = "free_y") +
geom_histogram(color = "black") +
geom_rug() +
labs(x = "Bloom DOY", y = "Count",
title = "DOY distribution by country",
subtitle = "Original scale")
df_full |>
filter(location != "NPN") |>
ggplot(aes(x = bloom_doy)) +
facet_wrap(~country, ncol = 1, scales = "free_y") +
geom_histogram(color = "black") +
geom_rug() +
scale_x_log10() +
labs(x = "Bloom DOY", y = "Count",
title = "DOY distribution by country",
subtitle = "Logarithmic scale")df_full |>
filter(country == "Japan") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "Japan")df_full |>
filter(country == "Switzerland") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "Switzerland")df_full |>
filter(country == "South Korea") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "South Korea")df_full |>
filter(location != "NPN") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "All the countries")df_full |>
filter(country == "Japan") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(
name = str_replace_all(
name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"
),
name = str_replace_all(
name,
"wildareas-v3-2009-human-footprint",
"human-footprint09"
)
) |>
ggplot(aes(
x = reorder(name, value),
y = term,
fill = value
)) +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(x = "",
y = "",
fill = "",
title = "Japan") +
coord_flip()
df_full |>
filter(country == "Switzerland") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(name = str_replace_all(name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"),
name = str_replace_all(name,
"wildareas-v3-2009-human-footprint",
"human-footprint09")) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(x = "", y = "", fill = "", title = "Switzerland") +
coord_flip()
df_full |>
filter(country == "South Korea") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(name = str_replace_all(name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"),
name = str_replace_all(name,
"wildareas-v3-2009-human-footprint",
"human-footprint09")) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(x = "", y = "", fill = "", title = "South Korea") +
coord_flip()
df_full |>
filter(location != "NPN") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(name = str_replace_all(name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"),
name = str_replace_all(name,
"wildareas-v3-2009-human-footprint",
"human-footprint09")) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.key.size = unit(0.85, "cm")) +
labs(x = "", y = "", fill = "", title = "All the countries") +
coord_flip()bloom_doy.bloom_doy (ya que esta variable sí cambia con el tiempo).df_full |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Annual precipitation (mm)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Annual precipitation (mm)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Annual precipitation (mm)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(`soc_0-5cm_mean` > 0) |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `soc_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Soil organic carbon (dg/kg)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`soc_0-5cm_mean` > 0) |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `soc_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Soil organic carbon (dg/kg)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`soc_0-5cm_mean` > 0) |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `soc_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Soil organic carbon (dg/kg)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(`bdod_0-5cm_mean` > 0) |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `bdod_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Bulk density (cg/cm³)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`bdod_0-5cm_mean` > 0) |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `bdod_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Bulk density (cg/cm³)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`bdod_0-5cm_mean` > 0) |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `bdod_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Bulk density (cg/cm³)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(`nitrogen_0-5cm_mean` > 0) |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio12, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual precipitation (mm)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`nitrogen_0-5cm_mean` > 0) |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio12, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual precipitation (mm)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`nitrogen_0-5cm_mean` > 0) |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio12, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual precipitation (mm)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = alt, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(y = "Annual precipitation (mm)",
x = "Altitude",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = alt, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(y = "Annual precipitation (mm)",
x = "Altitude)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = alt, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(y = "Annual precipitation (mm)",
x = "Altitude",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)countries <- c("Japan", "Switzerland", "South Korea")
df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Annual precipitation (mm)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Annual precipitation (mm)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Annual precipitation (mm)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `soc_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Soil organic carbon (dg/kg)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `soc_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Soil organic carbon (dg/kg)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `soc_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Soil organic carbon (dg/kg)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `bdod_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Bulk density (cg/cm³)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `bdod_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Bulk density (cg/cm³)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `bdod_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Bulk density (cg/cm³)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio12 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual precipitation (mm) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio12 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual precipitation (mm) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio12 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual precipitation (mm) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = alt * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Altitude * Annual precipitation (mm) ",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = alt * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Altitude * Annual precipitation (mm) ",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = alt * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Altitude * Annual precipitation (mm) ",
y = "DOY",
title = countries[3])Q1: valores de bloom_doy inferiores al valor del cuartil 1: \(DOY < Q1\).Q2: valores de bloom_doy mayores o iguales al valor del cuartil 1 y menores al valor del cuartil 2: \(Q1 \leq DOY < Q2\).Q3: valores de bloom_doy mayores o iguales al valor del cuartil 2 y menores al valor del cuartil 3: \(Q2 \leq DOY < Q3\).Q4: valores de bloom_doy superiores o iguales al valor del cuartil 3: \(DOY \geq Q3\).doy_categ la introduzco al PCA como variable suplementaría cualitativa.df_pca_japan1 <-
df_full |>
filter(country == countries[1])
value_q1 <-
quantile(df_pca_japan1$bloom_doy, probs = 0.25, na.rm = TRUE)
value_q2 <-
quantile(df_pca_japan1$bloom_doy, probs = 0.50, na.rm = TRUE)
value_q3 <-
quantile(df_pca_japan1$bloom_doy, probs = 0.75, na.rm = TRUE)
order_categ <-
c("DOY < Q1",
"Q1 <= DOY < Q2",
"Q2 <= DOY < Q3",
"DOY >= Q3")
df_pca_japan2 <-
df_pca_japan1 |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
select(-shrubs) |>
mutate(
doy_categ = case_when(
median_doy < value_q1 ~ "DOY < Q1",
median_doy >= value_q1 &
median_doy < value_q2 ~ "Q1 <= DOY < Q2",
median_doy >= value_q2 &
median_doy < value_q3 ~ "Q2 <= DOY < Q3",
median_doy >= value_q3 ~ "DOY >= Q3",
),
doy_categ = factor(doy_categ, levels = order_categ)
) |>
select(-c(location, country, bloom_date, bloom_doy, median_doy)) |>
relocate(doy_categ, everything())
pca_japan <- PCA(X = df_pca_japan2, graph = FALSE, quali.sup = 1)
df_pca_japan2$pc1 <- pca_japan$ind$coord[, 1]
df_pca_japan2$pc2 <- pca_japan$ind$coord[, 2]
df_pca_japan2$pc3 <- pca_japan$ind$coord[, 3]
eigen_pc1 <- pca_japan$eig[, 2][1] |> round(digits = 1)
eigen_pc2 <- pca_japan$eig[, 2][2] |> round(digits = 1)
eigen_pc3 <- pca_japan$eig[, 2][3] |> round(digits = 1)
fviz_pca_var(pca_japan, axes = c(1, 2), select.var = list(contrib = 15))
df_pca_japan2 |>
ggplot(aes(x = pc1, y = pc2, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Dim1 ({eigen_pc1}%)"),
y = glue("Dim2 ({eigen_pc2}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "")
fviz_pca_var(pca_japan, axes = c(1, 3), select.var = list(contrib = 15))
df_pca_japan2 |>
ggplot(aes(x = pc1, y = pc3, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Dim1 ({eigen_pc1}%)"),
y = glue("Dim3 ({eigen_pc3}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "")df_pca_switzerland1 <-
df_full |>
filter(country == countries[2])
value_q1 <-
quantile(df_pca_switzerland1$bloom_doy, probs = 0.25, na.rm = TRUE)
value_q2 <-
quantile(df_pca_switzerland1$bloom_doy, probs = 0.50, na.rm = TRUE)
value_q3 <-
quantile(df_pca_switzerland1$bloom_doy, probs = 0.75, na.rm = TRUE)
df_pca_switzerland2 <-
df_pca_switzerland1 |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
select(-shrubs) |>
mutate(
doy_categ = case_when(
median_doy < value_q1 ~ "DOY < Q1",
median_doy >= value_q1 &
median_doy < value_q2 ~ "Q1 <= DOY < Q2",
median_doy >= value_q2 &
median_doy < value_q3 ~ "Q2 <= DOY < Q3",
median_doy >= value_q3 ~ "DOY >= Q3",
),
doy_categ = factor(doy_categ, levels = order_categ)
) |>
select(-c(location, country, bloom_date, bloom_doy, median_doy)) |>
relocate(doy_categ, everything())
pca_switzerland <- PCA(X = df_pca_switzerland2, graph = FALSE, quali.sup = 1)
df_pca_switzerland2$pc1 <- pca_switzerland$ind$coord[, 1]
df_pca_switzerland2$pc2 <- pca_switzerland$ind$coord[, 2]
df_pca_switzerland2$pc3 <- pca_switzerland$ind$coord[, 3]
eigen_pc1 <- pca_switzerland$eig[, 2][1] |> round(digits = 1)
eigen_pc2 <- pca_switzerland$eig[, 2][2] |> round(digits = 1)
eigen_pc3 <- pca_switzerland$eig[, 2][3] |> round(digits = 1)
fviz_pca_var(pca_switzerland, axes = c(1, 2), select.var = list(contrib = 15))
df_pca_switzerland2 |>
ggplot(aes(x = pc1, y = pc2, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Dim1 ({eigen_pc1}%)"),
y = glue("Dim2 ({eigen_pc2}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "")
fviz_pca_var(pca_switzerland, axes = c(1, 3), select.var = list(contrib = 15))
df_pca_switzerland2 |>
ggplot(aes(x = pc1, y = pc3, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Dim1 ({eigen_pc1}%)"),
y = glue("Dim3 ({eigen_pc3}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "")df_pca_southk1 <-
df_full |>
filter(country == countries[3])
value_q1 <-
quantile(df_pca_southk1$bloom_doy, probs = 0.25, na.rm = TRUE)
value_q2 <-
quantile(df_pca_southk1$bloom_doy, probs = 0.50, na.rm = TRUE)
value_q3 <-
quantile(df_pca_southk1$bloom_doy, probs = 0.75, na.rm = TRUE)
df_pca_southk2 <-
df_pca_southk1 |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
select(-shrubs) |>
mutate(
doy_categ = case_when(
median_doy < value_q1 ~ "DOY < Q1",
median_doy >= value_q1 &
median_doy < value_q2 ~ "Q1 <= DOY < Q2",
median_doy >= value_q2 &
median_doy < value_q3 ~ "Q2 <= DOY < Q3",
median_doy >= value_q3 ~ "DOY >= Q3",
),
doy_categ = factor(doy_categ, levels = order_categ)
) |>
select(-c(location, country, bloom_date, bloom_doy, median_doy)) |>
relocate(doy_categ, everything())
pca_southk <- PCA(X = df_pca_southk2, graph = FALSE, quali.sup = 1)
df_pca_southk2$pc1 <- pca_southk$ind$coord[, 1]
df_pca_southk2$pc2 <- pca_southk$ind$coord[, 2]
df_pca_southk2$pc3 <- pca_southk$ind$coord[, 3]
eigen_pc1 <- pca_southk$eig[, 2][1] |> round(digits = 1)
eigen_pc2 <- pca_southk$eig[, 2][2] |> round(digits = 1)
eigen_pc3 <- pca_southk$eig[, 2][3] |> round(digits = 1)
fviz_pca_var(pca_southk, axes = c(1, 2), select.var = list(contrib = 15))
df_pca_southk2 |>
ggplot(aes(x = pc1, y = pc2, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Dim1 ({eigen_pc1}%)"),
y = glue("Dim2 ({eigen_pc2}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "")
fviz_pca_var(pca_southk, axes = c(1, 3), select.var = list(contrib = 15))
df_pca_southk2 |>
ggplot(aes(x = pc1, y = pc3, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Dim1 ({eigen_pc1}%)"),
y = glue("Dim3 ({eigen_pc3}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "")